calculate_sumstats <- function(data, ...) {
  data %>% 
    summarize(
      mean_price = mean(price),
      median_price = median(price),
      iqr_price = IQR(price),
      sd_price = sd(price),
      mean_mbps = mean(total_mbps_old),
      median_mbps = median(total_mbps_old),
      iqr_mbps = IQR(total_mbps_old),
      sd_mbps = sd(total_mbps_old),
      mean_isp = mean(num_isp),
      median_isp = median(num_isp),
      iqr_isp = IQR(num_isp),
      sd_isp = sd(num_isp),
      mean_fiber = mean(fiber), 
      mean_coaxial = mean(coaxial),
      mean_other = mean(other),
      mean_catD = mean(CatDSchool),
      .groups = "drop"
    )
}

# Function to create the LaTeX table to display ISPs by region
create_latex_table_isp_region <- function(data) {
  # Start table
  latex <- c(
    "\\begin{table}[htbp]",
    "\\centering",
    "\\caption{\\label{tab:counts} Aggregated Vendor Data by Region, 2014 and 2015}",
    "\\begin{threeparttable}",
    "\\begin{tabular}{lrrrr@{\\hspace{2em}}rrrr}",  # Added hspace for clean separation
    "\\toprule",
    " & \\multicolumn{4}{c}{Pre-Consortium (2014)} & \\multicolumn{4}{c}{Post-Consortium (2015)} \\\\",
    "ISP & N.E. & Cent. & South & N.W. & N.E. & Cent. & South & N.W. \\\\"
  )
  
  # Add midrule after headers
  latex <- c(latex, "\\midrule")
  
  # Add each row of data
  for(i in 1:nrow(data)) {
    row <- data[i,]
    row_text <- sprintf("%s & %d & %d & %d & %d & %d & %d & %d & %d \\\\",
                        row$vendor_agg,
                        row$`2014_Northeastern`,
                        row$`2014_Central`,
                        row$`2014_Southern`,
                        row$`2014_Northwestern`,
                        row$`2015_Northeastern`,
                        row$`2015_Central`,
                        row$`2015_Southern`,
                        row$`2015_Northwestern`)
    
    # Add midrule after "Other" row
    if(row$vendor_agg == "Other") {
      row_text <- c(row_text, "\\midrule")
    }
    
    latex <- c(latex, row_text)
  }
  
  # Add bottomrule and end table
  latex <- c(latex,
             "\\bottomrule",
             "\\end{tabular}",
             "\\begin{tablenotes}[flushleft]",
             # "\\item This table shows the number of school contracts by Internet Service Provider (ISP) across four regions of New Jersey in 2014 and 2015. Regions are Northeast (N.E.), Central (Cent.), South, and Northwest (N.W.).",
             "\\item Note. This table shows the number of school contracts by Internet Service Providers (ISPs)",
              "across four regions of New Jersey in 2014 and 2015. Regions are Northeast (N.E.), Central",
              "(Cent.), South, and Northwest (N.W.). ISPs with a (*) participated in the Consortium. Other",
              "Consortium participants include Aﬀiniti, Cogent, Education Network of America, Lightower,",
              "NexGen, Sunesys, xTel Communications.",
             "\\end{tablenotes}",
             "\\end{threeparttable}",
             "\\end{table}")
  
  # Return as single string with line breaks
  paste(latex, collapse = "\n")
}

# Helper function of formatting coeffficients
format_regression_coef <- function(coef, se, p_value) {
  stars <- ifelse(p_value < 0.01, "$^{***}$", 
                  ifelse(p_value < 0.05, "$^{**}$", 
                         ifelse(p_value < 0.1, "$^{*}$", "")))
  
  coef_formatted <- sprintf("%.3f", coef)
  if(coef < 0) {
    coef_formatted <- gsub("-", "$-$", coef_formatted)
  }
  
  se_formatted <- sprintf("(%.3f)", se)
  
  return(list(coef = paste0(coef_formatted, stars),
              se = se_formatted))
}

# Main function with fixed column grouping
create_latex_table <- function(models, 
                               dep_vars, 
                               model_numbers, 
                               caption = "Estimated Effect of Demand Bundling",
                               label = "tab:did",
                               controls_note = c("School Type", "ISP", "Region", "Service Type"),
                               table_note = paste("This table reports difference-in-differences estimates from equation (\\ref{eq:did})",
                                                  "under varying specifications."),
                               coef_map = NULL,
                               controls_in_columns = c(3,4),
                               column_groups = NULL) {
  
  # Default coefficient mapping if none provided
  if(is.null(coef_map)) {
    coef_map <- c(
      "DFALSE" = "Non-participant",
      "DTRUE" = "Participant",
      "Time" = "Post-Consortium",
      "DTRUE:Time" = "Participant x Post-Consortium",
      "num_isp" = "Number of ISPs"
    )
  }
  
  # Create checkmark string based on which columns have controls
  create_control_row <- function(control_name) {
    row_values <- rep("", 4)  
    row_values[controls_in_columns] <- "$\\checkmark$"  
    sprintf("%s & %s \\\\", control_name, paste(row_values, collapse = " & "))
  }
  
  # Start LaTeX table
  latex_code <- c(
    "\\begin{table}[t!!]",
    "\t\\centering",
    sprintf("\t\\caption{%s}\\label{%s}", caption, label),
    "\t\t\\begin{adjustbox}{scale=0.9}",
    "\t\t\\begin{threeparttable}",
    "\t\\begin{tabular}{@{\\extracolsep{5pt}}lcccc}",
    "\\\\[-1.8ex]\\toprule"
  )
  
  # Add column groups if provided
  if (!is.null(column_groups)) {
    # Create the group headers row
    cols <- character(4)  # Initialize with empty strings
    for (group in column_groups) {
      span <- group$columns[length(group$columns)] - group$columns[1] + 1
      cols[group$columns[1]] <- sprintf("\\multicolumn{%d}{c}{%s}", span, group$name)
    }
    latex_code <- c(latex_code,
                    sprintf(" & %s \\\\", paste(cols[cols != ""], collapse = " & ")))
    
    # Add clines for each group
    clines <- sapply(column_groups, function(group) {
      sprintf("\\cline{%d-%d}", group$columns[1] + 1, group$columns[length(group$columns)] + 1)
    })
    latex_code <- c(latex_code, paste(clines, collapse = " "))
  }
  
  # Add variable names and model numbers
  latex_code <- c(latex_code,
                  sprintf("\\\\[-1.8ex] & %s\\\\", paste(dep_vars, collapse = " & ")),
                  sprintf("\\\\[-1.8ex] & %s\\\\", paste(paste0("(", model_numbers, ")"), collapse = " & ")),
                  "\\hline \\\\[-1.8ex]"
  )
  
  # Rest of the function remains the same...
  
  # Get display names we want to show in order
  display_names <- c("Non-participant", "Participant", "Post-Consortium", 
                     "Participant x Post-Consortium", "Number of ISPs")
  
  # Modified coefficient section to handle both mapped and direct names
  for(display_name in display_names) {
    row_values <- c()
    for(model in models) {
      # Check if the display name exists directly in the model
      if(display_name %in% names(coef(model))) {
        idx <- which(names(coef(model)) == display_name)
        coef_val <- coef(model)[idx]
        se_val <- sqrt(diag(vcov(model)))[idx]
        p_val <- 2 * (1 - pt(abs(coef_val/se_val), df = model$df.residual))
        formatted <- format_regression_coef(coef_val, se_val, p_val)
        row_values <- c(row_values, 
                        formatted$coef,
                        formatted$se)
      } else {
        # Try the mapping approach if direct name not found
        actual_coef_name <- names(coef_map)[coef_map == display_name]
        if(any(actual_coef_name %in% names(coef(model)))) {
          idx <- which(names(coef(model)) %in% actual_coef_name)
          coef_val <- coef(model)[idx]
          se_val <- sqrt(diag(vcov(model)))[idx]
          p_val <- 2 * (1 - pt(abs(coef_val/se_val), df = model$df.residual))
          formatted <- format_regression_coef(coef_val, se_val, p_val)
          row_values <- c(row_values, 
                          formatted$coef,
                          formatted$se)
        } else {
          row_values <- c(row_values, "", "")
        }
      }
    }
    
    latex_code <- c(latex_code,
                    sprintf("%s & %s \\\\", display_name, 
                            paste(row_values[seq(1, length(row_values), 2)], collapse = " & ")),
                    sprintf("& %s \\\\", 
                            paste(row_values[seq(2, length(row_values), 2)], collapse = " & ")),
                    "& & & & \\\\")
  }
  
  # Add control variable indicators if provided
  if(!is.null(controls_note)) {
    latex_code <- c(latex_code, "\\midrule")
    for(control in controls_note) {
      latex_code <- c(latex_code, create_control_row(control))
    }
  }
  
  # Add model statistics
  latex_code <- c(latex_code,
                  "\\\\[-1.8ex]",
                  sprintf("Observations & %s \\\\", 
                          paste(sapply(models, function(m) length(m$fitted.values)), collapse = " & "))
  )
  
  # Close table
  latex_code <- c(latex_code,
                  "\\bottomrule",
                  "\\end{tabular}",
                  "\\begin{tablenotes}[flushleft]",
                  sprintf("\\item \\footnotesize \\emph{Note:} %s\\\\ $^{**}$p$<$0.05; $^{***}$p$<$0.01.", table_note),
                  "\\end{tablenotes}",
                  "\\end{threeparttable}",
                  "\\end{adjustbox}",
                  "\\end{table}"
  )
  
  return(paste(latex_code, collapse = "\n"))
}


#' Create a professional plot with error bars, using existing color keys
#'
#' @param data The dataset containing the plot data
#' @param x_var Name of x-axis variable (default: "Scale")
#' @param y_var Name of y-axis variable
#' @param color_var Name of color variable (default: "colorkey")
#' @param lower_bound Name of lower bound variable (default: "Lower_Bound")
#' @param upper_bound Name of upper bound variable (default: "Upper_Bound")
#' @param x_label X-axis label (default: "Degree of Violation")
#' @param y_label Y-axis label
#' @param plot_title Optional plot title
#' @param base_size Base font size for theme (default: 14)
#' @return A ggplot object
create_professional_plot <- function(
    data,
    x_var = "Scale",
    y_var,
    color_var = "colorkey",
    lower_bound = "Lower_Bound",
    upper_bound = "Upper_Bound",
    x_label = expression(atop("Deviation from Parallel Trends (g)",
                                      atop(italic("(g = 1.0 implies parallel trends)"), ""))),
    y_label,
    plot_title = NULL,
    base_size = 24
) {
  
  # Create the basic plot
  p <- ggplot(
    data,
    aes(
      x = .data[[x_var]],
      y = .data[[y_var]],
      color = .data[[color_var]]
    )
  ) +
    # Points and error bars
    geom_point(size = 4, alpha = 0.9) +
    geom_errorbar(
      aes(
        ymin = .data[[lower_bound]],
        ymax = .data[[upper_bound]]
      ),
      width = 0.05,
      size = 0.8,
      alpha = 0.8
    ) +
    # Reference line
    geom_hline(
      yintercept = 0,
      linetype = "dashed",
      color = "gray30",
      size = 0.5
    ) +
    # Labels
    labs(
      x = x_label,
      y = y_label,
      # title = plot_title,
      # caption = "Note: Error bars represent 95% confidence intervals"
    ) +
    # Theme customization
    theme_minimal(base_size = base_size) +
    theme(
      text = element_text(family = "serif"),
      # Text elements
      plot.title = element_text(
        size = base_size + 2,
        face = "bold",
        margin = margin(b = 20),
        color = "gray10"
      ),
      axis.title = element_text(
        size = base_size - 2,
        face = "bold",
        color = "gray10"
      ),
      axis.text = element_text(
        size = base_size - 4,
        color = "gray20"
      ),
      # Grid lines
      # panel.grid.major = element_line(color = "gray85", size = 0.3),
      panel.grid.major = element_blank(),
      panel.grid.minor = element_blank(),
      # Plot margins
      plot.margin = margin(t = 20, r = 20, b = 20, l = 20),
      # Remove legend
      legend.position = "none",
      # Add subtle border
      panel.border = element_rect(color = "gray50", fill = NA, size = 0.5)
    ) +
    # Scale customization
    scale_y_continuous(
      labels = scales::number_format(accuracy = 0.01),
      breaks = pretty_breaks(n = 8)
    ) +
    scale_x_continuous(breaks = pretty_breaks(n = 6)) +
    # Use the actual colors specified in colorkey
    scale_color_identity()
  
  return(p)
}

#' Save a plot with consistent settings
#'
#' @param plot The ggplot object to save
#' @param filename The filename to save to
#' @param width Plot width in inches (default: 8)
#' @param height Plot height in inches (default: 6)
#' @param dpi Resolution in dots per inch (default: 300)
save_professional_plot <- function(
    plot,
    filename,
    width = 8,
    height = 6,
    dpi = 300
) {
  ggsave(
    filename = filename,
    plot = plot,
    width = width,
    height = height,
    dpi = dpi,
    device = cairo_ps
  )
}